title: “analysis_Theron_08262021” author: “Theron Palmer” date: “8/26/2021” output: html_document: toc: yes toc_float: yes


Analysis Pipeline

On ZAPPA. recount3_tcga_juncs_prep.sh -> recount3_tcga_juncs.sh -> splicemutr_leafcutter.sh -> save_introns.sh -> cong_tcga_junc.R -> tcga_form_transcripts.sh -> process_peps.sh -> runMHCnuggets.sh -> process_percentile.sh -> create_genotypes.sh (TCGA_assign_imm.sh) -> TCGA_assign_imm_py.sh -> TCGA_combine_kmers.sh -> create_junc_expr.sh -> TCGA_gene_expr_per_sample.sh -> calc_gene_metric.sh

create_TCGA_TMB_maf_summary.sh

Loading in necessary libraries

## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.10.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##   genomic data. Bioinformatics 2016.
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:IRanges':
## 
##     desc
## The following object is masked from 'package:stats':
## 
##     filter

The TCGA MAF summary file

maf_file <- "/media/theron/One_Touch/TCGA_junctions/maf_summary.txt"
mc3_maf = read.table(maf_file,header=T)
mc3_maf$Tumor_Sample_ID <- vapply(TCGAbarcode(mc3_maf$Tumor_Sample_Barcode,sample=T),
                                  function(val){substr(val,1,nchar(val)-1)},
                                  character(1))
rownames(mc3_maf) <- mc3_maf$Tumor_Sample_Barcode
mc3_maf$sample_ID <- TCGAbarcode(mc3_maf$Tumor_Sample_Barcode,sample=T)

mut_sig_perc <- readRDS("/media/theron/One_Touch/TCGA_junctions/TCGA_cancers/mut_sig_percentages.rds")
mut_sig_perc$sample_ID<-TCGAbarcode(rownames(mut_sig_perc),sample=T)
apobec <- c("T[C>T]A","T[C>T]T","T[C>G]A","T[C>G]T")

Accumulating mutation data per sample only annotated

junc summary mean

TCGA_cibersort_all <- read.table("/media/theron/One_Touch/TCGA_junctions/ext_dat/TCGA.Kallisto.fullIDs.cibersort.relative.tsv",header=T)
TCGA_cibersort_all$SampleID <- str_replace_all(TCGA_cibersort_all$SampleID,"[.]","-")
cibersort_cells <- c("sample_ID","B.cells.naive","B.cells.memory","Plasma.cells",
                     "T.cells.CD8","T.cells.CD4.naive","T.cells.CD4.memory.resting",
                     "T.cells.CD4.memory.activated","T.cells.follicular.helper",
                     "T.cells.regulatory..Tregs.","T.cells.gamma.delta","NK.cells.resting",
                     "NK.cells.activated","Monocytes","Macrophages.M0","Macrophages.M1",
                     "Macrophages.M2","Dendritic.cells.resting","Dendritic.cells.activated",
                     "Mast.cells.resting","Mast.cells.activated","Eosinophils","Neutrophils")

tumor_data_file <- "/media/theron/One_Touch/TCGA_junctions/TCGA_cancers/JHPCE/cancer_dirs_filt.txt"
tumor_data <- read.table(tumor_data_file)
cancers <- tumor_data$V1

splicing_antigenicity_tum <- data.frame(cancers)

TMB_all <- data.frame(cancers)
TMB_all$av_cor <- NA
TMB_all$av_pval <- NA
TMB_all$med_cor <- NA
TMB_all$med_pval <- NA
TMB_all$max_cor <- NA
TMB_all$max_pval <- NA
rownames(TMB_all) <- cancers
all_genes <- c()

TCGA_ROOT_DIR="/media/theron/One_Touch/TCGA_junctions/TCGA_cancers"

for (i in seq(length(cancers))){
  cancer <- cancers[i]
  print(sprintf("%s: %d out of %d",cancer,i,length(cancers)))
  tumor_geno <- read.table(sprintf("%s/%s/JHPCE/%s_genotypes_specific.txt",TCGA_ROOT_DIR,cancer,cancer),header=T)
  tumor_geno$sample_ID <- TCGAbarcode(tumor_geno$aliquot_id,sample = T)
  tumor_geno <- tumor_geno %>% dplyr::filter(type=="T")
  tumor_geno <- tumor_geno[complete.cases(tumor_geno),]
  
  mc3_maf_small<-subset(mc3_maf,sample_ID %in% tumor_geno$sample_ID)
  TCGA_cibersort_all$sample_ID <- TCGAbarcode(TCGA_cibersort_all$SampleID,sample = T)
  TCGA_cibersort_small<-subset(TCGA_cibersort_all,sample_ID %in% tumor_geno$sample_ID)

  mc3_maf_small <- mc3_maf_small[complete.cases(mc3_maf_small),]
  mc3_maf_small$type <- vapply(rownames(mc3_maf_small),function(barcode){
    type<-as.numeric(substr(strsplit(barcode,"-")[[1]][4],1,2))
    if (type <= 9){
      return("T")
    } else if (type > 9 & type <= 19){
      return ("N")
    } else {
      return ("C")
    }
  },character(1))
  mc3_maf_small$TMB<-(mc3_maf_small$total)/((50353573)/(1*10**6))
  mc3_maf_small <- mc3_maf_small %>% dplyr::filter(type == "T")

  splice_mut_file <- sprintf("%s/%s/JHPCE/GENE_METRIC/filenames_tumor.txt",TCGA_ROOT_DIR,cancer)
  splice_mut_files <- read.table(splice_mut_file)
  for (j in seq(length(splice_mut_files$V1))){
    if (j == 1){
      combined_gene_metric <- readRDS(splice_mut_files$V1[j])
    } else {
      if (cancer=="LUAD" & j==7){
        break
      }
      a<-readRDS(splice_mut_files$V1[j])
      combined_gene_metric <- cbind(combined_gene_metric,a)
    }
  }
  # combined_gene_metric[combined_gene_metric==-Inf]<-0
  # combined_gene_metric[combined_gene_metric==Inf]<-0
  # combined_gene_metric[is.na(combined_gene_metric)]<-0
  
  splice_mut_file <- sprintf("%s/%s/JHPCE/GENE_METRIC/filenames_normal.txt",TCGA_ROOT_DIR,cancer)
  splice_mut_files <- read.table(splice_mut_file)
  for (j in seq(length(splice_mut_files$V1))){
    if (j == 1){
      combined_gene_metric_normal <- readRDS(splice_mut_files$V1[j])
    } else {
      if (cancer=="LUAD" & j==7){
        break
      }
      a<-readRDS(splice_mut_files$V1[j])
      combined_gene_metric_normal <- cbind(combined_gene_metric_normal,a)
    }
  }
  # combined_gene_metric_normal[combined_gene_metric_normal==-Inf]<-0
  # combined_gene_metric_normal[is.na(combined_gene_metric_normal)]<-0
  
  tumor_geno <- tumor_geno[tumor_geno$external_id %in% colnames(combined_gene_metric),]
  external_ids <- tumor_geno$external_id
  combined_gene_metric <- combined_gene_metric[,external_ids]
  combined_gene_metric_normal <- combined_gene_metric_normal[,external_ids]
  conglomerate_genes <- unique(rownames(combined_gene_metric),rownames(combined_gene_metric_normal))
  combined_gene_metric_tumor <- combined_gene_metric[conglomerate_genes,]
  # combined_gene_metric_tumor[is.na(combined_gene_metric_tumor)]<-0
  combined_gene_metric_normal <- combined_gene_metric_normal[conglomerate_genes,]
  # combined_gene_metric_normal[is.na(combined_gene_metric_normal)]<-0
  combined_gene_metric_tumor <- apply(combined_gene_metric_tumor,1,mean,na.rm=T)
  combined_gene_metric_normal <- apply(combined_gene_metric_normal,1,mean,na.rm=T)
  combined_gene_metric_DA <- data.frame(DA=(combined_gene_metric_tumor)/(combined_gene_metric_normal))
  
  print(ggplot(combined_gene_metric_DA,aes(x=log2(DA)))+geom_histogram()+scale_y_log10()+ylab("log10(counts)")+xlab("Differential Agretopicity"))
  
  combined_gene_metric[combined_gene_metric==-Inf]<-0
  combined_gene_metric[combined_gene_metric==Inf]<-0
  combined_gene_metric[is.na(combined_gene_metric)]<-0
  
  combined_gene_metric_DA$genes <- rownames(combined_gene_metric_DA)
  high_DA_genes <-  combined_gene_metric_DA[combined_gene_metric_DA$DA>0,"genes"]
  high_DA_genes <- high_DA_genes[!is.na(high_DA_genes)]
  all_genes <- unique(c(all_genes,rownames(combined_gene_metric)))
  

  colnames(combined_gene_metric) <- vapply(colnames(combined_gene_metric),function(col_name){
    tumor_geno$sample_ID[which(tumor_geno$external_id == col_name)[1]]
  },character(1))

  mc3_maf_small <- mc3_maf_small %>% dplyr::filter(sample_ID %in% colnames(combined_gene_metric))
  TCGA_cibersort_small <- TCGA_cibersort_all %>% dplyr::filter(sample_ID %in% colnames(combined_gene_metric))

  combined_gene_metric<-combined_gene_metric[high_DA_genes,mc3_maf_small$sample_ID]
  #combined_gene_metric<-combined_gene_metric[,mc3_maf_small$sample_ID]
  
  combined_gene_metric_per_sample<-data.frame(colnames(combined_gene_metric))
  combined_gene_metric_per_sample$av <- vapply(seq(ncol(combined_gene_metric)),
                                               function(col_val){mean(as.numeric(combined_gene_metric[,col_val]),na.rm=T)},
                                               numeric(1))
  combined_gene_metric_per_sample$med <- vapply(seq(ncol(combined_gene_metric)),
                                               function(col_val){median(as.numeric(combined_gene_metric[,col_val]),na.rm=T)},
                                               numeric(1))
  combined_gene_metric_per_sample$max <- vapply(seq(ncol(combined_gene_metric)),
                                               function(col_val){max(as.numeric(combined_gene_metric[,col_val]),na.rm=T)},
                                               numeric(1))
  combined_gene_metric_per_sample$sum <- vapply(seq(ncol(combined_gene_metric)),
                                               function(col_val){sum(as.numeric(combined_gene_metric[,col_val]),na.rm=T)},
                                               numeric(1))
  combined_gene_metric_per_sample$TMB <- mc3_maf_small$TMB
  combined_gene_metric_per_sample$cancer <- cancer
  colnames(combined_gene_metric_per_sample) <- c("sample","gene_metric_av","gene_metric_med",
                                                 "gene_metric_max","gene_metric_sum","TMB","cancer")
  combined_gene_metric_per_sample$TMB_TYPE <- "LOW"
  combined_gene_metric_per_sample$TMB_TYPE <- vapply(combined_gene_metric_per_sample$TMB,function(val){
    if(val>=10){
      return("TMB HIGH")
    } else {
      return("TMB LOW")
    }},character(1))
  cibersort_per_samp <- lapply(combined_gene_metric_per_sample$sample,
                             function(samp){
                               if (samp %in% TCGA_cibersort_small$sample_ID){
                                 return(TCGA_cibersort_small[which(TCGA_cibersort_small$sample_ID==samp),cibersort_cells,drop=F])
                               } else {
                                 a<-data.frame(t(rep(NA,length(cibersort_cells))))
                                 colnames(a)<-cibersort_cells
                                 return(a)
                               }
                             })
  cibersort_per_samp_df<-cibersort_per_samp[[1]]
  for (k in seq(2,length(cibersort_per_samp))){
    cibersort_per_samp_df <- rbind(cibersort_per_samp_df,cibersort_per_samp[[k]])
  }
  cibersort_per_samp_df[,c("gene_metric_av","TMB","cancer")]<-t(vapply(cibersort_per_samp_df$sample_ID,function(ID){
    as.character(combined_gene_metric_per_sample[combined_gene_metric_per_sample$sample==ID,c("gene_metric_av","TMB","cancer")])
  },character(3)))
  cibersort_per_samp_df$gene_metric_av <- as.numeric(cibersort_per_samp_df$gene_metric_av)
  cibersort_per_samp_df$TMB <- as.numeric(cibersort_per_samp_df$TMB)

  combined_gene_metric_perc <- data.frame(t(vapply(combined_gene_metric_per_sample$sample,function(samp){
    s<<-samp
    a<-which(mut_sig_perc$sample_ID == samp)
    if (length(a)==0){
      return(rep(-1,ncol(mut_sig_perc)-1))
    } else {
      return(as.numeric(mut_sig_perc[a,seq(96)]))
    }
  },numeric(ncol(mut_sig_perc)-1))))
  colnames(combined_gene_metric_perc)<-colnames(mut_sig_perc)[seq(96)]

  if (i == 1){
    combined_gene_metric_per_sample_all <- combined_gene_metric_per_sample
    combined_gene_metric_perc_all <-  combined_gene_metric_perc
  } else {
    combined_gene_metric_per_sample_all <- rbind(combined_gene_metric_per_sample_all,combined_gene_metric_per_sample)
    combined_gene_metric_perc_all <- rbind( combined_gene_metric_perc_all, combined_gene_metric_perc)
  }


  # combined_gene_metric_log10<-as.matrix(log10(combined_gene_metric+1))
  # colnames(combined_gene_metric_log10)<-colnames(combined_gene_metric)
  #
  # print(Heatmap(combined_gene_metric_log10,
  #         top_annotation = HeatmapAnnotation(TMB=anno_barplot(mc3_maf_small$TMB)),
  #         show_row_names=F,
  #         show_column_names = F,
  #         cluster_rows=T,
  #         cluster_columns=T))


  a<-cor.test(combined_gene_metric_per_sample$gene_metric_av,combined_gene_metric_per_sample$TMB,method="pearson")
  TMB_all[cancer,"av_cor"]<-a$estimate
  TMB_all[cancer,"av_pval"]<-a$p.value

  a<-cor.test(combined_gene_metric_per_sample$gene_metric_med,combined_gene_metric_per_sample$TMB,method="pearson")
  TMB_all[cancer,"med_cor"]<-a$estimate
  TMB_all[cancer,"med_pval"]<-a$p.value

  a<-cor.test(combined_gene_metric_per_sample$gene_metric_max,combined_gene_metric_per_sample$TMB,method="pearson")
  TMB_all[cancer,"max_cor"]<-a$estimate
  TMB_all[cancer,"max_pval"]<-a$p.value

  combined_gene_metric_perc_cor_pval <- data.frame(t(vapply(seq(ncol(mut_sig_perc)-1),function(col_val){
    col_val<<-col_val
    ret_val <- c()
    mut_col <- as.numeric(combined_gene_metric_perc[,col_val])

    aa<-data.frame(combined_gene_metric_per_sample$gene_metric_av,mut_col)
    colnames(aa)<-c("gene_metric_av","mut_col")
    aa<-aa %>% dplyr::filter(mut_col >= 0)
    a<-cor.test(aa$gene_metric_av,aa$mut_col,method="pearson")
    ret_val <- c(ret_val,a$estimate,a$p.value)

    aa<-data.frame(combined_gene_metric_per_sample$gene_metric_med,mut_col)
    colnames(aa)<-c("gene_metric_med","mut_col")
    aa<-aa %>% dplyr::filter(mut_col >= 0)
    a<-cor.test(aa$gene_metric_med,aa$mut_col,method="pearson")
    ret_val <- c(ret_val,a$estimate,a$p.value)

    aa<-data.frame(combined_gene_metric_per_sample$gene_metric_max,mut_col)
    colnames(aa)<-c("gene_metric_max","mut_col")
    aa<-aa %>% dplyr::filter(mut_col >= 0)
    a<-cor.test(aa$gene_metric_max,aa$mut_col,method="pearson")
    ret_val <- c(ret_val,a$estimate,a$p.value)
  },numeric(6))))
  colnames(combined_gene_metric_perc_cor_pval)<-c("av_cor","av_pval","med_cor","med_pval","max_cor","max_pval")
  rownames(combined_gene_metric_perc_cor_pval) <- colnames(combined_gene_metric_perc)
  
  combined_gene_metric_per_sample$TMB_TYPE <- factor(combined_gene_metric_per_sample$TMB_TYPE,levels=c("TMB LOW","TMB HIGH"))
  my_comparisons <- list( c("TMB LOW", "TMB HIGH"))
  HIGH_COUNT=length(which(combined_gene_metric_per_sample$TMB_TYPE=="TMB HIGH"))
  LOW_COUNT=length(which(combined_gene_metric_per_sample$TMB_TYPE=="TMB LOW"))
  if (HIGH_COUNT > 0 & LOW_COUNT > 0){
    wixoc_test_val <- compare_means(gene_metric_av ~ TMB_TYPE, 
                             comparisons = my_comparisons, p.adjust.method = "BH", data=combined_gene_metric_per_sample)
    print(ggplot(combined_gene_metric_per_sample,aes(y=gene_metric_av,x=TMB_TYPE))+
      geom_boxplot(fill="grey")+stat_pvalue_manual(wixoc_test_val, 
      y.position = max(combined_gene_metric_per_sample$gene_metric_av)+(max(combined_gene_metric_per_sample$gene_metric_av)/2), step.increase = 0.1,
      label = "p.adj" )+labs(title=sprintf("%s, HIGH: %d, LOW: %d",cancer,HIGH_COUNT,LOW_COUNT))+ ylab("splicing antigenicity"))
  } else {
    print(ggplot(combined_gene_metric_per_sample,aes(y=gene_metric_av,x=TMB_TYPE))+
      geom_boxplot(fill="black",alpha=0.2)+labs(title=sprintf("%s, HIGH: %d, LOW: %d",cancer,HIGH_COUNT,LOW_COUNT))+
      ylab("splicing antigenicity"))
  }
  
  if (i == 1){
    cibersort_per_samp_df_all <- cibersort_per_samp_df
  } else {
    cibersort_per_samp_df_all <- rbind(cibersort_per_samp_df_all,cibersort_per_samp_df)
  }
  
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=B.cells.naive))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+  
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=B.cells.memory))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=Plasma.cells))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=T.cells.CD8))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=T.cells.CD4.naive))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=T.cells.CD4.memory.resting))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=T.cells.CD4.memory.activated))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=T.cells.follicular.helper))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=T.cells.regulatory..Tregs.))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=T.cells.gamma.delta))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=NK.cells.resting))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=NK.cells.activated))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=Monocytes))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=Macrophages.M0))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=Macrophages.M1))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=Macrophages.M2))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=Dendritic.cells.resting))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=Dendritic.cells.activated))+
  #   geom_point()+
  #   stat_cor(method = "pearson",  
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=Mast.cells.resting))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=Mast.cells.activated))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=Eosinophils))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  # 
  # print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=Neutrophils))+
  #   geom_point()+
  #   stat_cor(method = "pearson",
  #            label.x.npc = "left",
  #            label.y.npc = "top")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))


  # print(ggplot(combined_gene_metric_per_sample,aes(x=log10(gene_metric_med+1),y=TMB))+
  #   geom_point()+
  #   stat_cor(method = "pearson")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))
  #
  # print(ggplot(combined_gene_metric_per_sample,aes(x=log10(gene_metric_max+1),y=TMB))+
  #   geom_point()+
  #   stat_cor(method = "pearson")+
  #   geom_smooth(method="lm")+
  #   labs(title=sprintf(cancer)))

  combined_gene_metric_perc_cor_pval <- combined_gene_metric_perc_cor_pval %>% dplyr::filter(av_pval <= 0.05 | med_pval <= 0.05 | max_pval <= 0.05)
  rownames(combined_gene_metric_perc_cor_pval) <- vapply(rownames(combined_gene_metric_perc_cor_pval),function(rname){
    if (rname %in% apobec){
      return(sprintf("%s:abobec",rname))
    } else {
      return(rname)
    }
  },character(1))
  combined_gene_metric_perc_cor <- combined_gene_metric_perc_cor_pval[,c("av_cor","med_cor","max_cor")]
  combined_gene_metric_perc_pval <- combined_gene_metric_perc_cor_pval[,c("av_pval","med_pval","max_pval")]

  if (nrow(combined_gene_metric_perc_cor_pval) > 0){
    print(Heatmap(as.matrix(combined_gene_metric_perc_cor), cell_fun = function(j, i, x, y, w, h, fill) {
      if(combined_gene_metric_perc_pval[i, j] < 0.005) {
          grid.text("**", x, y)
      } else if(combined_gene_metric_perc_pval[i, j] < 0.05) {
          grid.text("*", x, y)
      }
    },name=sprintf("%s: splicemut vs mutation type",cancer),cluster_columns=F))
  } else {
    print(sprintf("%s: no significant mutation trends",cancer))
  }
}
## [1] "BLCA: 1 out of 15"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1677 rows containing non-finite values (stat_bin).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 2 rows containing missing values (geom_bar).

## [1] "BRCA: 2 out of 15"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3302 rows containing non-finite values (stat_bin).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 1 rows containing missing values (geom_bar).

## [1] "CHOL: 3 out of 15"
## Warning in FUN(X[[i]], ...): NaNs produced
## Warning in FUN(X[[i]], ...): NaNs produced
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 705 rows containing non-finite values (stat_bin).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 4 rows containing missing values (geom_bar).

## [1] "COAD: 4 out of 15"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3116 rows containing non-finite values (stat_bin).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 1 rows containing missing values (geom_bar).

## [1] "HNSC: 5 out of 15"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2275 rows containing non-finite values (stat_bin).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 1 rows containing missing values (geom_bar).

## [1] "KICH: 6 out of 15"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2179 rows containing non-finite values (stat_bin).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## [1] "KIRC: 7 out of 15"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2825 rows containing non-finite values (stat_bin).

## [1] "KIRP: 8 out of 15"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2569 rows containing non-finite values (stat_bin).

## [1] "LIHC: 9 out of 15"
## Warning in FUN(X[[i]], ...): NaNs produced
## Warning in FUN(X[[i]], ...): NaNs produced
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1821 rows containing non-finite values (stat_bin).

## [1] "LUAD: 10 out of 15"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2497 rows containing non-finite values (stat_bin).

## [1] "LUSC: 11 out of 15"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2780 rows containing non-finite values (stat_bin).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 2 rows containing missing values (geom_bar).

## [1] "PRAD: 12 out of 15"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2460 rows containing non-finite values (stat_bin).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 1 rows containing missing values (geom_bar).

## [1] "READ: 13 out of 15"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1667 rows containing non-finite values (stat_bin).

## [1] "THCA: 14 out of 15"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2488 rows containing non-finite values (stat_bin).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 1 rows containing missing values (geom_bar).
## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## [1] "UCEC: 15 out of 15"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2810 rows containing non-finite values (stat_bin).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 2 rows containing missing values (geom_bar).

Correlation Datatable

datatable(TMB_all, caption = "TMB")
ggplot(TMB_all,aes(x=av_cor,y=av_pval,label=cancers))+geom_text(check_overlap=T)+ylim(c(0.05,max(TMB_all$av_pval)))

TMB Correlation for all samples

HIGH_COUNT=length(which(combined_gene_metric_per_sample_all$TMB_TYPE=="TMB HIGH"))
LOW_COUNT=length(which(combined_gene_metric_per_sample_all$TMB_TYPE=="TMB LOW"))
my_comparisons <- list( c("TMB LOW", "TMB HIGH"))
wixoc_test_val <- compare_means(gene_metric_av ~ TMB_TYPE, 
                             comparisons = my_comparisons, p.adjust.method = "BH", data=combined_gene_metric_per_sample_all)
print(ggplot(combined_gene_metric_per_sample_all,aes(y=gene_metric_av,x=TMB_TYPE))+
  geom_boxplot(fill="grey")+stat_pvalue_manual(wixoc_test_val, 
  y.position = max(combined_gene_metric_per_sample_all$gene_metric_av)+(max(combined_gene_metric_per_sample_all$gene_metric_av)/2),
  label = "p.adj" )+labs(title=sprintf("all cohorts, HIGH: %d, LOW: %d",HIGH_COUNT,LOW_COUNT))+
    ylab(("splicing antigenicity")))

ggplot(combined_gene_metric_per_sample_all,aes(x=gene_metric_av,y=log10(TMB+1)))+
  geom_point()+
  stat_cor(method = "pearson",label.x.npc="middle",label.y.npc="top")+
  geom_smooth(method="lm")+
    labs("All Samples")
## `geom_smooth()` using formula 'y ~ x'

combined_gene_metric_per_sample_all_sum <- data.frame(cancer=unique(combined_gene_metric_per_sample_all$cancer))
for (cancer in combined_gene_metric_per_sample_all$cancer){
  combined_gene_metric_per_sample_small <- combined_gene_metric_per_sample_all[combined_gene_metric_per_sample_all$cancer==cancer,]
  combined_gene_metric_per_sample_all_sum[combined_gene_metric_per_sample_all_sum$cancer==cancer,"TMB"] <- median(combined_gene_metric_per_sample_small$TMB,na.rm=T)
  combined_gene_metric_per_sample_all_sum[combined_gene_metric_per_sample_all_sum$cancer==cancer,"splicing_antigenicity"] <- mean(combined_gene_metric_per_sample_small$gene_metric_av,na.rm=T)
}
ggplot(combined_gene_metric_per_sample_all_sum,aes(x=splicing_antigenicity,y=TMB,label=cancer))+
  stat_cor(method = "pearson",label.x.npc="middle",label.y.npc="top")+
  geom_smooth(method="lm")+
  geom_text(check_overlap = T)+
  labs("All Samples")+
  xlab("splicing antigenicity")+ylab("median TMB")
## `geom_smooth()` using formula 'y ~ x'

## TMB, splicing antigenicity, and junction types per cancer

combined_gene_metric_per_sample_all_sum <- data.frame(cancer=unique(combined_gene_metric_per_sample_all$cancer))
for (cancer in combined_gene_metric_per_sample_all$cancer){
  combined_gene_metric_per_sample_small <- combined_gene_metric_per_sample_all[combined_gene_metric_per_sample_all$cancer==cancer,]
  combined_gene_metric_per_sample_all_sum[combined_gene_metric_per_sample_all_sum$cancer==cancer,"TMB"] <- median(combined_gene_metric_per_sample_small$TMB,na.rm=T)
  combined_gene_metric_per_sample_all_sum[combined_gene_metric_per_sample_all_sum$cancer==cancer,"splicing_antigenicity"] <- mean(combined_gene_metric_per_sample_small$gene_metric_av,na.rm=T)
}
rownames(combined_gene_metric_per_sample_all_sum) <- combined_gene_metric_per_sample_all_sum$cancer

tumor_data_file <- "/media/theron/One_Touch/TCGA_junctions/TCGA_cancers/JHPCE/cancer_dirs_filt.txt"
tumor_data <- read.table(tumor_data_file)
cancers <- tumor_data$V1
TCGA_ROOT_DIR="/media/theron/One_Touch/TCGA_junctions/TCGA_cancers"
intron_counts_per_cancer <- data.frame(cancers)
rownames(intron_counts_per_cancer)<-intron_counts_per_cancer$cancers
intron_types <- c("annotated","cryptic_fiveprime","cryptic_threeprime","cryptic_unanchored","novel annotated pair")
intron_counts_per_cancer[,intron_types]<-0
intron_counts_per_cancer <- intron_counts_per_cancer[,seq(2,ncol(intron_counts_per_cancer))]

for (cancer in cancers){
  print(cancer)
  cancer_leafcutter_file <- sprintf("%s/%s/JHPCE/leafcutter_out/data.Rdata",TCGA_ROOT_DIR,cancer)
  load(cancer_leafcutter_file)
  introns_tumor <- introns[introns$deltapsi>0,]
  intron_counts_per_cancer[cancer,]<-vapply(intron_types,function(intron_type){
    return(length(which(introns_tumor$verdict==intron_type)))
  },numeric(1))
}
## [1] "BLCA"
## [1] "BRCA"
## [1] "CHOL"
## [1] "COAD"
## [1] "HNSC"
## [1] "KICH"
## [1] "KIRC"
## [1] "KIRP"
## [1] "LIHC"
## [1] "LUAD"
## [1] "LUSC"
## [1] "PRAD"
## [1] "READ"
## [1] "THCA"
## [1] "UCEC"
intron_counts_per_cancer_norm <- intron_counts_per_cancer/apply(intron_counts_per_cancer,1,sum)
intron_counts_per_cancer_scaled <- data.frame(apply(intron_counts_per_cancer_norm,2,scale))
rownames(intron_counts_per_cancer_scaled)<-rownames(intron_counts_per_cancer_norm)
colnames(intron_counts_per_cancer_scaled) <- intron_types
print(Heatmap(t(intron_counts_per_cancer_scaled),
        top_annotation = HeatmapAnnotation(median_TMB=anno_barplot(combined_gene_metric_per_sample_all_sum[rownames(intron_counts_per_cancer),"TMB"])),
        bottom_annotation = HeatmapAnnotation(splicing_antigenicity=anno_barplot(combined_gene_metric_per_sample_all_sum[rownames(intron_counts_per_cancer),"splicing_antigenicity"])),
        show_row_names=T,
        show_column_names = T,
        cluster_rows=T,
        cluster_columns=T,
        heatmap_legend_param=list(title="zscore")))

print(Heatmap(t(log10(intron_counts_per_cancer)),
        top_annotation = HeatmapAnnotation(median_TMB=anno_barplot(combined_gene_metric_per_sample_all_sum[rownames(intron_counts_per_cancer),"TMB"])),
        show_row_names=T,
        show_column_names = T,
        cluster_rows=T,
        cluster_columns=T,
        heatmap_legend_param=list(title="log10(counts)")))

Analyzing Splicing Factor Mutations per cancer type including missense mutations

# Reading in supplementary data

TCGA_mutation_representation <- read_excel("/media/theron/One_Touch/splicing_factor_paper/NIHMS958968-supplement-3.xlsx",skip=2)
TCGA_mutation_representation$sample_ID <- TCGAbarcode(TCGA_mutation_representation$Tumor_Sample_Barcode,sample = T)
TCGA_mutation_representation_filt <- TCGA_mutation_representation[TCGA_mutation_representation$sample_ID %in% combined_gene_metric_per_sample_all$sample,]
cancers <- unique(TCGA_mutation_representation_filt$Cohort)
splicing_factor_genes <- unique(TCGA_mutation_representation_filt$Hugo_Symbol)
for (gene in splicing_factor_genes){
  combined_gene_metric_per_sample_all[,gene]<-"WT"
  TCGA_mutation_representation_filt_small <- TCGA_mutation_representation_filt[TCGA_mutation_representation_filt$Hugo_Symbol==gene,]
  combined_gene_metric_per_sample_all[combined_gene_metric_per_sample_all$sample %in% TCGA_mutation_representation_filt_small$sample_ID,gene]<-"MUT"
}

for (cancer in cancers){
    combined_gene_metric_per_sample_all_small <- combined_gene_metric_per_sample_all[combined_gene_metric_per_sample_all$cancer==cancer,]
    my_comparisons=list(c("WT","MUT"))
    
    MUT_VALS<-length(which(combined_gene_metric_per_sample_all_small$SF3B1=="MUT"))
    WT_VALS<-length(which(combined_gene_metric_per_sample_all_small$SF3B1=="WT"))
    if (MUT_VALS>0 & WT_VALS>0){
      wixoc_test_val <- compare_means(gene_metric_av ~ SF3B1,comparisons = my_comparisons, p.adjust.method = "BH", data=combined_gene_metric_per_sample_all_small)
      print(ggplot(combined_gene_metric_per_sample_all_small,aes(y=gene_metric_av,x=SF3B1))+
      geom_boxplot(fill="grey")+
        stat_pvalue_manual(wixoc_test_val, 
    y.position = max(combined_gene_metric_per_sample_all_small$gene_metric_av)+(max(combined_gene_metric_per_sample_all_small$gene_metric_av)/2),
    label = "p.adj" )+
      labs(title=sprintf("%s: MUT=%d,WT=%d",cancer,MUT_VALS,WT_VALS))+
        ylab("splicing antigenicity"))
    } else {
      print(ggplot(combined_gene_metric_per_sample_all_small,aes(y=gene_metric_av,x=SF3B1))+
      geom_boxplot(fill="grey")+
      labs(title=sprintf("%s: MUT=%d,WT=%d",cancer,MUT_VALS,WT_VALS))+
        ylab("splicing antigenicity"))
    }

    MUT_VALS<-length(which(combined_gene_metric_per_sample_all_small$U2AF1=="MUT"))
    WT_VALS<-length(which(combined_gene_metric_per_sample_all_small$U2AF1=="WT"))
    if (MUT_VALS>0 & WT_VALS>0){
      wixoc_test_val <- compare_means(gene_metric_av ~ U2AF1,comparisons = my_comparisons, p.adjust.method = "BH", data=combined_gene_metric_per_sample_all_small)
      print(ggplot(combined_gene_metric_per_sample_all_small,aes(y=gene_metric_av,x=U2AF1))+
      geom_boxplot(fill="grey")+
        stat_pvalue_manual(wixoc_test_val, 
    y.position = max(combined_gene_metric_per_sample_all_small$gene_metric_av)+(max(combined_gene_metric_per_sample_all_small$gene_metric_av)/2),
    label = "p.adj" )+
      labs(title=sprintf("%s: MUT=%d,WT=%d",cancer,MUT_VALS,WT_VALS))+
        ylab("splicing antigenicity"))
    } else {
      print(ggplot(combined_gene_metric_per_sample_all_small,aes(y=gene_metric_av,x=U2AF1))+
      geom_boxplot(fill="grey")+
      labs(title=sprintf("%s: MUT=%d,WT=%d",cancer,MUT_VALS,WT_VALS))+
        ylab("splicing antigenicity"))
    }

        
    MUT_VALS<-length(which(combined_gene_metric_per_sample_all_small$SRSF2=="MUT"))
    WT_VALS<-length(which(combined_gene_metric_per_sample_all_small$SRSF2=="WT"))
    if (MUT_VALS>0 & WT_VALS>0){
      wixoc_test_val <- compare_means(gene_metric_av ~ SRSF2,comparisons = my_comparisons, p.adjust.method = "BH", data=combined_gene_metric_per_sample_all_small)
      print(ggplot(combined_gene_metric_per_sample_all_small,aes(y=gene_metric_av,x=SRSF2))+
        geom_boxplot(fill="grey")+
        stat_pvalue_manual(wixoc_test_val, 
    y.position = max(combined_gene_metric_per_sample_all_small$gene_metric_av)+(max(combined_gene_metric_per_sample_all_small$gene_metric_av)/2),
    label = "p.adj" )+
      labs(title=sprintf("%s: MUT=%d,WT=%d",cancer,MUT_VALS,WT_VALS))+
        ylab("splicing antigenicity"))
    } else {
      print(ggplot(combined_gene_metric_per_sample_all_small,aes(y=gene_metric_av,x=SRSF2))+
      geom_boxplot(fill="grey")+
      labs(title=sprintf("%s: MUT=%d,WT=%d",cancer,MUT_VALS,WT_VALS))+
        ylab("splicing antigenicity"))
    }
    
    MUT_VALS<-length(which(combined_gene_metric_per_sample_all_small$RBM10=="MUT"))
    WT_VALS<-length(which(combined_gene_metric_per_sample_all_small$RBM10=="WT"))
    if (MUT_VALS>0 & WT_VALS>0){
      wixoc_test_val <- compare_means(gene_metric_av ~ RBM10,comparisons = my_comparisons, p.adjust.method = "BH", data=combined_gene_metric_per_sample_all_small)
      print(ggplot(combined_gene_metric_per_sample_all_small,aes(y=gene_metric_av,x=RBM10))+
      geom_boxplot(fill="grey")+
        stat_pvalue_manual(wixoc_test_val,
    y.position = max(combined_gene_metric_per_sample_all_small$gene_metric_av)+(max(combined_gene_metric_per_sample_all_small$gene_metric_av)/2),
    label = "p.adj" )+
      labs(title=sprintf("%s: MUT=%d,WT=%d",cancer,MUT_VALS,WT_VALS))+
        ylab("splicing antigenicity"))
    } else {
      print(ggplot(combined_gene_metric_per_sample_all_small,aes(y=gene_metric_av,x=RBM10))+
      geom_boxplot(fill="grey")+
      labs(title=sprintf("%s: MUT=%d,WT=%d",cancer,MUT_VALS,WT_VALS))+
        ylab("splicing antigenicity"))
    }
    
}

Evaluating the 119 splicing factor genes in the splicing paper manner

splicing_factor_genes<-read_excel("/media/theron/One_Touch/TCGA_junctions/ext_dat/splicing_factor_genes1.xlsx")
splice_maf_file <- read.table("/media/theron/One_Touch/TCGA_junctions/TCGA_cancers/JHPCE/filter_maf/output/filenames.txt",header=F)

for (i in seq(nrow(splice_maf_file))){
  file<-splice_maf_file[i,1]
  gene <- unlist(strsplit(basename(file),"_"))[1]
  gene_maf_dat <- read.table(file,header=T,sep="\t",quote="")
  gene_maf_dat_filt<-gene_maf_dat[gene_maf_dat$Hugo_Symbol==gene,]
  if (i==1){
    splice_maf_data <- gene_maf_dat_filt
  } else {
    splice_maf_data <- rbind(splice_maf_data,gene_maf_dat_filt)
  }
}

saveRDS(splice_maf_data,file="/media/theron/One_Touch/TCGA_junctions/TCGA_cancers/JHPCE/filter_maf/output/splicing_factor_data.maf.rds")
write.table(splice_maf_data,file="/media/theron/One_Touch/TCGA_junctions/TCGA_cancers/JHPCE/filter_maf/output/splicing_factor_data.maf",col.names=T,sep="\t",quote=F)
# Ben Ho Park genes: SF3B1, U2AF1 or SRSF2
splice_maf_data <- readRDS("/media/theron/One_Touch/TCGA_junctions/TCGA_cancers/JHPCE/filter_maf/output/splicing_factor_data.maf.rds")
splice_maf_data$sample_ID <- TCGAbarcode(splice_maf_data$Tumor_Sample_Barcode,sample = T)
splice_maf_data_filt <- splice_maf_data[splice_maf_data$sample_ID %in% combined_gene_metric_per_sample_all$sample,]
splicing_factor_genes <- c(unique(splice_maf_data$Hugo_Symbol),"RBM39")
cancers <- unique(combined_gene_metric_per_sample_all$cancer)
for (gene in splicing_factor_genes){
  combined_gene_metric_per_sample_all[,gene]<-"WT"
  splice_maf_data_filt_small <- splice_maf_data_filt[splice_maf_data_filt$Hugo_Symbol==gene,]
  combined_gene_metric_per_sample_all[combined_gene_metric_per_sample_all$sample %in% splice_maf_data_filt_small$sample_ID,gene]<-"MUT"
}
combined_gene_metric_per_sample_all[,"all_genes"]<-"WT"
combined_gene_metric_per_sample_all[combined_gene_metric_per_sample_all$sample %in% splice_maf_data_filt$sample_ID,"all_genes"]<-"MUT"

all_comps_df <- data.frame(cancer=cancers)
rownames(all_comps_df) <- cancers

for (i in seq(length(cancers))){
  print(i)
  cancer <- cancers[i]
  combined_gene_metric_per_sample_all_small <- combined_gene_metric_per_sample_all[combined_gene_metric_per_sample_all$cancer==cancer,]
  my_comparisons=list(c("WT","MUT"))

  MUT_VALS<-length(which(combined_gene_metric_per_sample_all_small$all_genes=="MUT"))
  WT_VALS<-length(which(combined_gene_metric_per_sample_all_small$all_genes=="WT"))
  if (MUT_VALS>0 & WT_VALS>0){
    wixoc_test_val <- compare_means(gene_metric_av ~ all_genes,comparisons = my_comparisons, p.adjust.method = "BH", data=combined_gene_metric_per_sample_all_small)
    print(ggplot(combined_gene_metric_per_sample_all_small,aes(y=gene_metric_av,x=all_genes))+
    geom_boxplot(fill="grey")+
      stat_pvalue_manual(wixoc_test_val,
  y.position = max(combined_gene_metric_per_sample_all_small$gene_metric_av)+(max(combined_gene_metric_per_sample_all_small$gene_metric_av)/2),
  label = "p.adj" )+
    labs(title=sprintf("%s: MUT=%d,WT=%d",cancer,MUT_VALS,WT_VALS))+
      ylab("splicing antigenicity")+xlab("all genes"))
  } else {
    print(ggplot(combined_gene_metric_per_sample_all_small,aes(y=gene_metric_av,x=all_genes))+
    geom_boxplot(fill="grey")+
    labs(title=sprintf("%s: MUT=%d,WT=%d",cancer,MUT_VALS,WT_VALS))+
      ylab("splicing antigenicity")+xlab("all genes"))
  }
    
  for (j in seq(length(splicing_factor_genes))){
    gene <- splicing_factor_genes[j]
    gene_metric_av_gene_df <- data.frame(splicing_antigenicity=combined_gene_metric_per_sample_all_small$gene_metric_av,
                                   gene=combined_gene_metric_per_sample_all_small[,gene])
    if (length(unique(gene_metric_av_gene_df$gene))==1){
      a<-c(rep(NA,8),cancer,gene,NA,
           length(gene_metric_av_gene_df$splicing_antigenicity[gene_metric_av_gene_df$gene=="WT"]),
                  length(gene_metric_av_gene_df$splicing_antigenicity[gene_metric_av_gene_df$gene=="MUT"]))
    } else {
      a<-compare_means(splicing_antigenicity~gene,data=gene_metric_av_gene_df)
      a$cancer <- cancer
      a$gene <- gene
      a$mut_ov_wt <- mean(gene_metric_av_gene_df$splicing_antigenicity[gene_metric_av_gene_df$gene=="MUT"],na.rm=T)/mean(gene_metric_av_gene_df$splicing_antigenicity[gene_metric_av_gene_df$gene=="WT"],na.rm=T)
      a$WT_NUM <- length(gene_metric_av_gene_df$splicing_antigenicity[gene_metric_av_gene_df$gene=="WT"])
      a$MUT_NUM <- length(gene_metric_av_gene_df$splicing_antigenicity[gene_metric_av_gene_df$gene=="MUT"])
      a<-data.frame(a)
      
    }
    if (i == 1){
      all_comps <- a
    } else {
      all_comps <- rbind(all_comps,a)
    }
  }
}
## [1] 1

## [1] 2

## [1] 3

## [1] 4

## [1] 5

## [1] 6

## [1] 7

## [1] 8

## [1] 9

## [1] 10

## [1] 11

## [1] 12

## [1] 13

## [1] 14

## [1] 15

for (j in seq(length(splicing_factor_genes))){
  gene <- splicing_factor_genes[j]
  gene_metric_av_gene_df <- data.frame(splicing_antigenicity=combined_gene_metric_per_sample_all$gene_metric_av,
                                 gene=combined_gene_metric_per_sample_all[,gene])
  if (length(unique(gene_metric_av_gene_df$gene))==1){
    a<-rep(NA,11)
  } else {
    a<-compare_means(splicing_antigenicity~gene,data=gene_metric_av_gene_df)
    a$cancer <- "all"
    a$gene <- gene
    a$mut_ov_wt <- mean(gene_metric_av_gene_df$splicing_antigenicity[gene_metric_av_gene_df$gene=="MUT"],na.rm=T)/mean(gene_metric_av_gene_df$splicing_antigenicity[gene_metric_av_gene_df$gene=="WT"],na.rm=T)
    a$WT_NUM <- length(gene_metric_av_gene_df$splicing_antigenicity[gene_metric_av_gene_df$gene=="WT"])
    a$MUT_NUM <- length(gene_metric_av_gene_df$splicing_antigenicity[gene_metric_av_gene_df$gene=="MUT"])
    a<-data.frame(a)
  }
  if (j == 1){
    all_comps_all_samples <- a
  } else {
    all_comps_all_samples <- rbind(all_comps_all_samples,a)
  }
}


# all_comps_filt <- all_comps[all_comps$p<=0.05,]
# all_comps_all_samples_filt <- all_comps_all_samples[all_comps_all_samples$p<=0.05,]

write.table(all_comps,file="/media/theron/One_Touch/TCGA_junctions/TCGA_cancers/JHPCE/filter_maf/output/all_comps.txt",col.names=T,quote=F,sep="\t")
write.table(all_comps_all_samples,file="/media/theron/One_Touch/TCGA_junctions/TCGA_cancers/JHPCE/filter_maf/output/all_comps_all_samples.txt",col.names=T,quote=F,sep="\t")

All comparisons filtered

datatable(all_comps)

All comparisons with conglomerate samples and filtered

datatable(all_comps_all_samples)

Analyzing Splicing Factor Mutations per cancer type excluding missense mutations

# Reading in supplementary data

TCGA_mutation_representation <- read_excel("/media/theron/One_Touch/splicing_factor_paper/NIHMS958968-supplement-3.xlsx",skip=2)
TCGA_mutation_representation$sample_ID <- TCGAbarcode(TCGA_mutation_representation$Tumor_Sample_Barcode,sample = T)
TCGA_mutation_representation_filt <- TCGA_mutation_representation[TCGA_mutation_representation$sample_ID %in% combined_gene_metric_per_sample_all$sample & TCGA_mutation_representation$Variant_Classification != "Missense_Mutation",]
cancers <- unique(TCGA_mutation_representation_filt$Cohort)
splicing_factor_genes <- unique(TCGA_mutation_representation_filt$Hugo_Symbol)
for (gene in splicing_factor_genes){
  combined_gene_metric_per_sample_all[,gene]<-"WT"
  TCGA_mutation_representation_filt_small <- TCGA_mutation_representation_filt[TCGA_mutation_representation_filt$Hugo_Symbol==gene,]
  combined_gene_metric_per_sample_all[combined_gene_metric_per_sample_all$sample %in% TCGA_mutation_representation_filt_small$sample_ID,gene]<-"MUT"
}

for (cancer in cancers){
    combined_gene_metric_per_sample_all_small <- combined_gene_metric_per_sample_all[combined_gene_metric_per_sample_all$cancer==cancer,]
    my_comparisons=list(c("WT","MUT"))
    
    MUT_VALS<-length(which(combined_gene_metric_per_sample_all_small$SF3B1=="MUT"))
    WT_VALS<-length(which(combined_gene_metric_per_sample_all_small$SF3B1=="WT"))
    if (MUT_VALS>0 & WT_VALS>0){
      wixoc_test_val <- compare_means(gene_metric_av ~ SF3B1,comparisons = my_comparisons, p.adjust.method = "BH", data=combined_gene_metric_per_sample_all_small)
      print(ggplot(combined_gene_metric_per_sample_all_small,aes(y=gene_metric_av,x=SF3B1))+
      geom_boxplot(fill="grey")+
        stat_pvalue_manual(wixoc_test_val, 
    y.position = max(combined_gene_metric_per_sample_all_small$gene_metric_av)+(max(combined_gene_metric_per_sample_all_small$gene_metric_av)/2),
    label = "p.adj" )+
      labs(title=sprintf("%s: MUT=%d,WT=%d",cancer,MUT_VALS,WT_VALS))+
        ylab("splicing antigenicity"))
    } else {
      print(ggplot(combined_gene_metric_per_sample_all_small,aes(y=gene_metric_av,x=SF3B1))+
      geom_boxplot(fill="grey")+
      labs(title=sprintf("%s: MUT=%d,WT=%d",cancer,MUT_VALS,WT_VALS))+
        ylab("splicing antigenicity"))
    }

    MUT_VALS<-length(which(combined_gene_metric_per_sample_all_small$U2AF1=="MUT"))
    WT_VALS<-length(which(combined_gene_metric_per_sample_all_small$U2AF1=="WT"))
    if (MUT_VALS>0 & WT_VALS>0){
      wixoc_test_val <- compare_means(gene_metric_av ~ U2AF1,comparisons = my_comparisons, p.adjust.method = "BH", data=combined_gene_metric_per_sample_all_small)
      print(ggplot(combined_gene_metric_per_sample_all_small,aes(y=gene_metric_av,x=U2AF1))+
      geom_boxplot(fill="grey")+
        stat_pvalue_manual(wixoc_test_val, 
    y.position = max(combined_gene_metric_per_sample_all_small$gene_metric_av)+(max(combined_gene_metric_per_sample_all_small$gene_metric_av)/2),
    label = "p.adj" )+
      labs(title=sprintf("%s: MUT=%d,WT=%d",cancer,MUT_VALS,WT_VALS))+
        ylab("splicing antigenicity"))
    } else {
      print(ggplot(combined_gene_metric_per_sample_all_small,aes(y=gene_metric_av,x=U2AF1))+
      geom_boxplot(fill="grey")+
      labs(title=sprintf("%s: MUT=%d,WT=%d",cancer,MUT_VALS,WT_VALS))+
        ylab("splicing antigenicity"))
    }

        
    MUT_VALS<-length(which(combined_gene_metric_per_sample_all_small$SRSF2=="MUT"))
    WT_VALS<-length(which(combined_gene_metric_per_sample_all_small$SRSF2=="WT"))
    if (MUT_VALS>0 & WT_VALS>0){
      wixoc_test_val <- compare_means(gene_metric_av ~ SRSF2,comparisons = my_comparisons, p.adjust.method = "BH", data=combined_gene_metric_per_sample_all_small)
      print(ggplot(combined_gene_metric_per_sample_all_small,aes(y=gene_metric_av,x=SRSF2))+
        geom_boxplot(fill="grey")+
        stat_pvalue_manual(wixoc_test_val, 
    y.position = max(combined_gene_metric_per_sample_all_small$gene_metric_av)+(max(combined_gene_metric_per_sample_all_small$gene_metric_av)/2),
    label = "p.adj" )+
      labs(title=sprintf("%s: MUT=%d,WT=%d",cancer,MUT_VALS,WT_VALS))+
        ylab("splicing antigenicity"))
    } else {
      print(ggplot(combined_gene_metric_per_sample_all_small,aes(y=gene_metric_av,x=SRSF2))+
      geom_boxplot(fill="grey")+
      labs(title=sprintf("%s: MUT=%d,WT=%d",cancer,MUT_VALS,WT_VALS))+
        ylab("splicing antigenicity"))
    }
    
    MUT_VALS<-length(which(combined_gene_metric_per_sample_all_small$RBM10=="MUT"))
    WT_VALS<-length(which(combined_gene_metric_per_sample_all_small$RBM10=="WT"))
    if (MUT_VALS>0 & WT_VALS>0){
      wixoc_test_val <- compare_means(gene_metric_av ~ RBM10,comparisons = my_comparisons, p.adjust.method = "BH", data=combined_gene_metric_per_sample_all_small)
      print(ggplot(combined_gene_metric_per_sample_all_small,aes(y=gene_metric_av,x=RBM10))+
      geom_boxplot(fill="grey")+
        stat_pvalue_manual(wixoc_test_val,
    y.position = max(combined_gene_metric_per_sample_all_small$gene_metric_av)+(max(combined_gene_metric_per_sample_all_small$gene_metric_av)/2),
    label = "p.adj" )+
      labs(title=sprintf("%s: MUT=%d,WT=%d",cancer,MUT_VALS,WT_VALS))+
        ylab("splicing antigenicity"))
    } else {
      print(ggplot(combined_gene_metric_per_sample_all_small,aes(y=gene_metric_av,x=RBM10))+
      geom_boxplot(fill="grey")+
      labs(title=sprintf("%s: MUT=%d,WT=%d",cancer,MUT_VALS,WT_VALS))+
        ylab("splicing antigenicity"))
    }
    
}

Evaluating the 119 splicing factor genes in the splicing paper manner

splicing_factor_genes<-read_excel("/media/theron/One_Touch/TCGA_junctions/ext_dat/splicing_factor_genes1.xlsx")
splice_maf_file <- read.table("/media/theron/One_Touch/TCGA_junctions/TCGA_cancers/JHPCE/filter_maf/output/filenames.txt",header=F)

for (i in seq(nrow(splice_maf_file))){
  file<-splice_maf_file[i,1]
  gene <- unlist(strsplit(basename(file),"_"))[1]
  gene_maf_dat <- read.table(file,header=T,sep="\t",quote="")
  gene_maf_dat_filt<-gene_maf_dat[gene_maf_dat$Hugo_Symbol==gene,]
  if (i==1){
    splice_maf_data <- gene_maf_dat_filt
  } else {
    splice_maf_data <- rbind(splice_maf_data,gene_maf_dat_filt)
  }
}

saveRDS(splice_maf_data,file="/media/theron/One_Touch/TCGA_junctions/TCGA_cancers/JHPCE/filter_maf/output/splicing_factor_data.maf.rds")
write.table(splice_maf_data,file="/media/theron/One_Touch/TCGA_junctions/TCGA_cancers/JHPCE/filter_maf/output/splicing_factor_data.maf",col.names=T,sep="\t",quote=F)
splice_maf_data <- readRDS("/media/theron/One_Touch/TCGA_junctions/TCGA_cancers/JHPCE/filter_maf/output/splicing_factor_data.maf.rds")
splice_maf_data$sample_ID <- TCGAbarcode(splice_maf_data$Tumor_Sample_Barcode,sample = T)
splice_maf_data_filt <- splice_maf_data[splice_maf_data$sample_ID %in% combined_gene_metric_per_sample_all$sample & splice_maf_data$Variant_Classification != "Missense_Mutation",]

splicing_factor_genes <- unique(splice_maf_data$Hugo_Symbol)
cancers <- unique(combined_gene_metric_per_sample_all$cancer)
for (gene in splicing_factor_genes){
  combined_gene_metric_per_sample_all[,gene]<-"WT"
  splice_maf_data_filt_small <- splice_maf_data_filt[splice_maf_data_filt$Hugo_Symbol==gene,]
  combined_gene_metric_per_sample_all[combined_gene_metric_per_sample_all$sample %in% splice_maf_data_filt_small$sample_ID,gene]<-"MUT"
}
combined_gene_metric_per_sample_all[,"all_genes"]<-"WT"
combined_gene_metric_per_sample_all[combined_gene_metric_per_sample_all$sample %in% splice_maf_data_filt$sample_ID,"all_genes"]<-"MUT"

for (i in seq(length(cancers))){
  print(i)
  cancer <- cancers[i]
  combined_gene_metric_per_sample_all_small <- combined_gene_metric_per_sample_all[combined_gene_metric_per_sample_all$cancer==cancer,]
  my_comparisons=list(c("WT","MUT"))

  MUT_VALS<-length(which(combined_gene_metric_per_sample_all_small$all_genes=="MUT"))
  WT_VALS<-length(which(combined_gene_metric_per_sample_all_small$all_genes=="WT"))
  if (MUT_VALS>0 & WT_VALS>0){
    wixoc_test_val <- compare_means(gene_metric_av ~ all_genes,comparisons = my_comparisons, p.adjust.method = "BH", data=combined_gene_metric_per_sample_all_small)
    print(ggplot(combined_gene_metric_per_sample_all_small,aes(y=gene_metric_av,x=all_genes))+
    geom_boxplot(fill="grey")+
      stat_pvalue_manual(wixoc_test_val,
  y.position = max(combined_gene_metric_per_sample_all_small$gene_metric_av)+(max(combined_gene_metric_per_sample_all_small$gene_metric_av)/2),
  label = "p.adj" )+
    labs(title=sprintf("%s: MUT=%d,WT=%d",cancer,MUT_VALS,WT_VALS))+
      ylab("splicing antigenicity")+xlab("all genes"))
  } else {
    print(ggplot(combined_gene_metric_per_sample_all_small,aes(y=gene_metric_av,x=all_genes))+
    geom_boxplot(fill="grey")+
    labs(title=sprintf("%s: MUT=%d,WT=%d",cancer,MUT_VALS,WT_VALS))+
      ylab("splicing antigenicity")+xlab("all genes"))
  }
    
  for (j in seq(length(splicing_factor_genes))){
    gene <- splicing_factor_genes[j]
    gene_metric_av_gene_df <- data.frame(splicing_antigenicity=combined_gene_metric_per_sample_all_small$gene_metric_av,
                                   gene=combined_gene_metric_per_sample_all_small[,gene])
    if (length(unique(gene_metric_av_gene_df$gene))==1){
      a<-rep(NA,11)
    } else {
      a<-compare_means(splicing_antigenicity~gene,data=gene_metric_av_gene_df)
      a$cancer <- cancer
      a$gene <- gene
      a$mut_ov_wt <- mean(gene_metric_av_gene_df$splicing_antigenicity[gene_metric_av_gene_df$gene=="MUT"],na.rm=T)/mean(gene_metric_av_gene_df$splicing_antigenicity[gene_metric_av_gene_df$gene=="WT"],na.rm=T)
      a$WT_NUM <- length(gene_metric_av_gene_df$splicing_antigenicity[gene_metric_av_gene_df$gene=="WT"])
      a$MUT_NUM <- length(gene_metric_av_gene_df$splicing_antigenicity[gene_metric_av_gene_df$gene=="MUT"])
      a<-data.frame(a)
      
    }
    if (i == 1){
      all_comps <- a
    } else {
      all_comps <- rbind(all_comps,a)
    }
  }
}
## [1] 1

## [1] 2

## [1] 3

## [1] 4

## [1] 5

## [1] 6

## [1] 7

## [1] 8

## [1] 9

## [1] 10

## [1] 11

## [1] 12

## [1] 13

## [1] 14

## [1] 15

for (j in seq(length(splicing_factor_genes))){
  gene <- splicing_factor_genes[j]
  gene_metric_av_gene_df <- data.frame(splicing_antigenicity=combined_gene_metric_per_sample_all$gene_metric_av,
                                 gene=combined_gene_metric_per_sample_all[,gene])
  if (length(unique(gene_metric_av_gene_df$gene))==1){
    a<-rep(NA,11)
  } else {
    a<-compare_means(splicing_antigenicity~gene,data=gene_metric_av_gene_df)
    a$cancer <- "all"
    a$gene <- gene
    a$mut_ov_wt <- mean(gene_metric_av_gene_df$splicing_antigenicity[gene_metric_av_gene_df$gene=="MUT"],na.rm=T)/mean(gene_metric_av_gene_df$splicing_antigenicity[gene_metric_av_gene_df$gene=="WT"],na.rm=T)
    a$WT_NUM <- length(gene_metric_av_gene_df$splicing_antigenicity[gene_metric_av_gene_df$gene=="WT"])
    a$MUT_NUM <- length(gene_metric_av_gene_df$splicing_antigenicity[gene_metric_av_gene_df$gene=="MUT"])
    a<-data.frame(a)
  }
  if (j == 1){
    all_comps_all_samples <- a
  } else {
    all_comps_all_samples <- rbind(all_comps_all_samples,a)
  }
}

all_comps_filt <- all_comps[all_comps$p<=0.05,]
all_comps_all_samples_filt <- all_comps_all_samples[all_comps_all_samples$p<=0.05,]


write.table(all_comps,file="/media/theron/One_Touch/TCGA_junctions/TCGA_cancers/JHPCE/filter_maf/output/all_comps_nomissense.txt",col.names=T,quote=F,sep="\t")
write.table(all_comps_all_samples,file="/media/theron/One_Touch/TCGA_junctions/TCGA_cancers/JHPCE/filter_maf/output/all_comps_all_samples_nomissense.txt",col.names=T,quote=F,sep="\t")

All comparisons filtered

datatable(all_comps)

All comparisons with conglomerate samples and filtered

datatable(all_comps_all_samples)

Cell Proportions all samples

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=B.cells.naive))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=B.cells.memory))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=Plasma.cells))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=T.cells.CD8))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=T.cells.CD4.naive))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=T.cells.CD4.memory.resting))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=T.cells.CD4.memory.activated))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=T.cells.follicular.helper))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=T.cells.regulatory..Tregs.))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=T.cells.gamma.delta))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=NK.cells.resting))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=NK.cells.activated))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=Monocytes))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df,aes(x=gene_metric_av,y=Macrophages.M0))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=Macrophages.M1))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=Macrophages.M2))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=Dendritic.cells.resting))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=Dendritic.cells.activated))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=Mast.cells.resting))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=Mast.cells.activated))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=Eosinophils))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=Neutrophils))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'

  print(ggplot(cibersort_per_samp_df_all,aes(x=gene_metric_av,y=Neutrophils))+
    geom_point()+
    stat_cor(method = "pearson",
             label.x.npc = "left",
             label.y.npc = "top")+
    geom_smooth(method="lm"))
## `geom_smooth()` using formula 'y ~ x'